Matrix Factorization

source: here

$$ R \simeq PQ = \hat{R} $$

In [2]:
import numpy

error

scalar indicates the difference between $$R \ and \ \hat{R}$$

regularization term

scalar $$ \lambda ( \sum_{i}^{} \|x_i\| ^2 + \sum_{j}^{} \|y_j\| ^2 ) $$


In [5]:
def error(P, Q, i, j):
    return R[i][j] - numpy.dot(P[i,:],Q[:,j])


def regular(beta, P, Q, i, k, j):
    return (beta/2) * (pow(P[i][k], 2) + pow(Q[k][j], 2))


def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
    Q = Q.T
    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    for k in range(K):
                        P[i][k] = P[i][k] + alpha * (2 * error(P, Q, i, j)
                                                     * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * error(P, Q, i, j) 
                                                     * P[i][k] - beta * Q[k][j])
        
        
        eR = numpy.dot(P,Q)
        e = 0
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    e = e + error(P, Q, i, j) ** 2
                    for k in range(K):                      
                        e = e + regular(beta, P, Q, i, k, j)
        if e < 0.001:
            break
    return P, Q.T

Experiments


In [6]:
R = [
     [5,3,0,1],
     [4,0,0,1],
     [1,1,0,5],
     [1,0,0,4],
     [0,1,5,4],
    ]

R = numpy.array(R)

N = len(R)
M = len(R[0])

# number of dimension about factor vector
K = 2

P = numpy.random.rand(N,K)
Q = numpy.random.rand(M,K)

nP, nQ = matrix_factorization(R, P, Q, K)
nR = numpy.dot(nP, nQ.T)

print(nR)


[[ 5.04412737  2.79980887  5.61699272  0.99512797]
 [ 3.92353281  2.17961936  4.49568822  1.00110353]
 [ 1.12168842  0.66044396  3.89292911  4.96453571]
 [ 0.93934042  0.55163335  3.15909328  3.97630913]
 [ 2.59586174  1.46892927  4.85156314  4.03008701]]